
### -------------------------------------------------- #
### ---- Download data (and initial cleaning) ---- 
### -------------------------------------------------- #

# *Standardizes names for DVs 

get_rd_data <- function(match, dedup, csv=FALSE, date_txt="2-22", em=FALSE)
{
  # Read data
  if(em==FALSE){
  fp_base <- paste0("Temp_Data/",date_txt,"_comb_tfa_matches_clean_merge_m",match,"_dedup",dedup)} 
  if(em==TRUE){
    fp_base <- paste0("Temp_Data/",date_txt,"_comb_tfa_matches_clean_merge_m",match,"_dedup",dedup)}
  if(csv==FALSE) dat <- read_dta(paste0(fp_base,".dta"))
  if(csv==TRUE){
    # Download
    dat <- fread(paste0(fp_base,".csv")) 
    # Verify unique variable names 
    dat %<>% setNames(make.names(names(.), unique = TRUE))
    # Coerce numeric vars
    if("state_st" %in% variable.names(dat)){
      dat %<>% mutate_at(vars(-contains("state"), -contains("State"), -contains("ac1_type")),
                         funs(as.numeric))
    } else dat %<>% mutate_all(funs(as.numeric))
  }

  # Return data
  return(dat)
}



### -------------------------------------------------- #
### ---- Subset Cleaned Data Files  ---- 
### -------------------------------------------------- #

### Subset Data
subset_rd_data <- function(dat, dedup, drop_2008=FALSE, subset_d6=TRUE, min_ay=NULL, max_ay=NULL,
                           dropStates=NULL, keepStates=NULL, rm_na_state=FALSE)
{
  # Drop non-citizens
  dat %<>% filter(!is.na(uscitizen) & uscitizen==1)
  # Filter to remove obs w/out state info 
  if(rm_na_state==TRUE){
    if(dedup==6) dat %<>% filter(state_st!="" & !is.na(state_st))
    if(dedup %in% c(10)){
      dat %<>% 
        filter(state_uni!="" & !is.na(state_uni)) %>%
        filter(state_ca_clean!="" & !is.na(state_ca_clean)) %>%
        filter(state_cell!="" & !is.na(state_cell))
    }
  }
  # Subset to survey zipcode available if desired for dedup6
  if(dedup==6 & subset_d6==TRUE){
    dat %<>% filter(finished==1)
  } 
  # Filter by appyear >= min_ay
  if(!is.null(min_ay)) dat %<>% filter(appyear >= min_ay)
  if(!is.null(max_ay)) dat %<>% filter(appyear <= max_ay)
  # Drop 2008 if desired 
  if(drop_2008 == TRUE) dat %<>% filter(appyear != 2008)
  # Drop subset of states 
  if(dedup==6) dat %<>% mutate(stateVar = vf_match_state_st) 
  if(!is.null(dropStates)) dat %<>% filter(!stateVar %in% dropStates)
  if(!is.null(keepStates)) dat %<>% filter(stateVar %in% keepStates)
  # Return data
  return(dat)
}



### -------------------------------------------------- #
### ---- Open & Subset Cleaned Data Files  ---- 
### -------------------------------------------------- #

open_data <- function(match, dedup, min_ay=NULL, max_ay=NULL,
                      subset_d6=FALSE, drop_2008=FALSE,  
                      csv=FALSE, dropStates=NULL, keepStates=NULL, rm_na_state=FALSE,
                      date_txt="2-22", em=FALSE)
{
  # Download Data (and subset zscore and rename DVs)
  get_rd_data(
    match     = match, 
    dedup     = dedup, 
    csv       = csv,
    date_txt  = date_txt,
    em        = em
  ) %>%
    # Subset   
    subset_rd_data(
      drop_2008  = drop_2008,
      subset_d6  = subset_d6,
      dedup      = dedup,
      min_ay     = min_ay,
      max_ay     = max_ay,
      dropStates = dropStates,
      keepStates = keepStates,
      rm_na_state= rm_na_state
    ) 
}



### -------------------------------------------------- #
### ---- Open Calculated Results ---- 
### -------------------------------------------------- #

get_results <- function(wd="../")
{
  # Open saved results
  setwd(wd)
  dat <- readRDS("Analysis/Results/results_robust.rds")
  dat2 <- readRDS("Analysis/Results/results_robust_allDups.rds")
  dat <- rbind(dat, dat2)
  # Remove any duplicate rows  
  dat %<>% distinct()
  # Clean version of variables for plotting     
  dat %<>%
    # Clean version of dv period for plotting 
    mutate(period = ifelse(str_detect(dv,"postAPP"), "postApp", NA)) %>%
    mutate(period = ifelse(str_detect(dv,"preAPP"), "pre", period)) %>%
    mutate(period = ifelse(str_detect(dv,"postTFA"), "postTFA", period)) %>%
    mutate(period = ifelse(str_detect(dv,"post1Y"), "post1Y", period)) %>%
    # Clean version of dv type for plotting 
    mutate(dvType = ifelse(str_detect(dv,"prop_vote"),"Proportion Vote", NA)) %>%
    mutate(dvType = ifelse(str_detect(dv,"ever_vote"), "Ever Vote", dvType)) %>%
    mutate(dvType = ifelse(str_detect(dv,"regist"), "Registration", dvType)) %>%
    # Clean version of elections included for plotting
    mutate(elections = ifelse(str_detect(dv,"12to5"), "2012-2015", "2004-2015")) %>%
    mutate(elections = ifelse(str_detect(dv,"10to5"), "2010-2015", elections)) %>%
    mutate(elections = ifelse(str_detect(dv,"08to5"), "2008-2015", elections)) %>%
    mutate(elections = ifelse(str_detect(dv,"mid"), str_c("Midterm Elections, ",elections), elections)) %>%
    mutate(elections = ifelse(str_detect(dv,"pres"), str_c("Presidential Elections, ",elections), elections)) %>%
    mutate(elections = ifelse(str_detect(dv,"offcycle"), str_c("Offcycle Elections, ",elections), elections)) %>%
    mutate(elections = ifelse(!str_detect(dv,"mid") & !str_detect(dv,"offcycle") & !str_detect(dv,"pres"), 
                              str_c("Presidential and Midterm Elections, ",elections), elections))  %>%
    mutate(elections = ifelse(dvType=="Registration",NA, elections))
  # Restack
  dat %<>% 
    gather(key="key", value="value", contains("cace"), contains("itt")) %>%
    filter(str_detect(key, "tau.bc") | str_detect(key, "se.rb")) %>%
    mutate(calcType = ifelse(str_detect(key,"cace."), "CACE", "ITT")) %>%
    mutate(estType = ifelse(str_detect(key,"tau.bc"), "coef", "stderr")) %>%
    select(-key) %>%
    spread(key="estType", value="value") %>%
    mutate(calcType = factor(calcType, levels=c("CACE", "ITT"))) %>%
    mutate(period = factor(period, levels=c("pre", "postApp", "post1Y", "postTFA")))
  
  return(dat)
}


### -------------------------------------------------- 
### *** Subset ---- 
### -------------------------------------------------- 
subset_res <- function(dat, elec, dv, #see elec & dv options below 
                       states="allStates", # Note: set default in function
                       minAppYear=2010, maxAppYear=NA,
                       drop08=TRUE, prd=c("pre", "postApp", "postTFA"),
                       d) 
{
  # Filter dedup 
  dat %<>% filter(dedup %in% d)
  # Filter min application year (/cohort)
  if(!is.na(minAppYear)) dat %<>% filter(min_ay==minAppYear)
  if(is.na(minAppYear)) dat %<>% filter(is.na(min_ay))
  if(!is.na(maxAppYear)) dat %<>% filter(max_ay==maxAppYear)
  if(is.na(maxAppYear)) dat %<>% filter(is.na(max_ay))
  # Filter DV type
  if(dv=="Ever Vote") dat %<>% filter(as.character(dvType)=="Ever Vote")
  if(dv=="Proportion Vote") dat %<>% filter(as.character(dvType)=="Proportion Vote")
  if(dv=="Registration") dat %<>% filter(dvType=="Registration")
  # Filter by period
  dat %<>% filter(period %in% prd)
  # Elections
  if(!is.na(elec)) dat %<>% filter(elections==elec)
  # Filter other vars 
  dat %<>% 
    filter(drop_2008==drop08) %>%
    filter(keepStates==states)
  return(dat)
} 















